PREPROCESSING SEQUENCES

Script to process ITS sequences from LMNP and IPNP

IMPORTING AND DEMULTIPLEXING SEQUENCES

IMPORTING TO QIIME2

  • Make directories for each library
mkdir ITS1
mkdir ITS2
mkdir ITS3
mkdir ITS4
mkdir ITS5
mkdir ITS6
mkdir ITS7
mkdir ITS8
mkdir ITS9
  • Loop to rename forward seqs
for i in ITS* ;do mv ITS*/ITS_*1.fastq.gz  ITS*/forward.fastq.gz ; done
  • Loop to rename reverse seqs
for i in ITS* ;do mv ITS*/ITS_*2.fastq.gz  ITS*/reverse.fastq.gz ; done
  • Loop to import files to QIIME2
for i in ITS*; do qiime tools import --type MultiplexedPairedEndBarcodeInSequence --input-path $i --output-path $i.qza ; done

DEMULTIPLEXING WITH CUTADAPT

qiime cutadapt demux-paired\
  --i-seqs its1.qza\
  --m-forward-barcodes-file ../../maps_its/its1.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its1_demux

qiime cutadapt demux-paired\
  --i-seqs its2.qza\
  --m-forward-barcodes-file ../../maps_its/its2.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its2_demux

qiime cutadapt demux-paired\
  --i-seqs its3.qza\
  --m-forward-barcodes-file ../../maps_its/its3.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its3_demux

qiime cutadapt demux-paired\
  --i-seqs its4.qza --m-forward-barcodes-file ../../maps_its/its4.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its4_demux

qiime cutadapt demux-paired  --i-seqs its5.qza\
  --m-forward-barcodes-file ../../maps_its/its5.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its5_demux

qiime cutadapt demux-paired  --i-seqs its6.qza\
  --m-forward-barcodes-file ../../maps_its/its6.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its6_demux

qiime cutadapt demux-paired  --i-seqs its7.qza\
  --m-forward-barcodes-file ../../maps_its/its7.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its7_demux

qiime cutadapt demux-paired\
  --i-seqs its8.qza\
  --m-forward-barcodes-file ../../maps_its/its8.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its8_demux

qiime cutadapt demux-paired\
  --i-seqs its9.qza\
  --m-forward-barcodes-file ../../maps_its/its9.txt\
  --m-forward-barcodes-column BarcodeSequence\
  --output-dir its9_demux

ITSXPRESS QIIME2

OPTION 1: MERGED SEQUENCES

  • Running itsxpress
for i in its*_demux ; do qiime itsxpress trim-pair --i-per-sample-sequences $i/per_sample_sequences.qza --p-region ITS2 --p-taxa F --o-trimmed itsxpress_2024_$i;done
  • Export sequences
for i in itsxpress_2024_its*; do qiime tools export --input-path $i --output-path exported; done
  • Import sequences together
qiime tools import  --type 'SampleData[SequencesWithQuality]'  --input-path manifest.txt  --output-path single-end-demux-qiime2.qza  --input-format SingleEndFastqManifestPhred33V2

OPTION 2: UNMERGED SEQUENCES

  • Running itsxpress
for i in its*_demux ; do qiime itsxpress trim-pair-output-unmerged --i-per-sample-sequences $i/per_sample_sequences.qza --p-region ITS2 --p-taxa F --o-trimmed itsxpress_2024_unmerged$i;done
  • Export sequences
for i in itsxpress_2024_unmerged*; do qiime tools export --input-path $i --output-path exported_$i; done
  • Import sequences together
 qiime tools import   --type 'SampleData[PairedEndSequencesWithQuality]'   --input-path manifest.txt   --output-path paired-end-demux.qza   --input-format PairedEndFastqManifestPhred33V2

ITSXPRESS STANDALONE

EXPORTING DATA

for i in its*_demux; do qiime tools export  --input-path  $i/per_sample_sequences.qza --output-path  exported_$i; done

OPTION 1 : MERGED SEQUENCES

  • Move all data to one directory and run this bash script (chatGPT):

#!/bin/bash

# Set the path to the itsxpress executable
ITSXPRESS_EXECUTABLE="itsxpress"

# Loop through the files
for forward_file in *_R1_001.fastq.gz; do
    # Extract the sample name from the forward file
    sample_name=$(basename "$forward_file" _R1_001.fastq.gz)

    # Construct the reverse file name
    reverse_file="${sample_name}_R2_001.fastq.gz"

    # Run itsxpress command
    $ITSXPRESS_EXECUTABLE --fastq "$forward_file" --fastq2 "$reverse_file" --region ITS2 \
        --taxa Fungi --log "${sample_name}_logfile.txt" --outfile "${sample_name}_trimmed_reads.fastq.gz" --threads 2

    # Optionally, you can print a message indicating the completion of each iteration
    echo "Processing $forward_file and $reverse_file"
done

OPTION 2 : UNMERGED SEQUENCES

  • Move all data to one directory and run this bash script (chatGPT):

#!/bin/bash

# Set the path to the itsxpress executable
ITSXPRESS_EXECUTABLE="itsxpress"

# Loop through the files
for forward_file in *_R1_001.fastq.gz; do
    # Extract the sample name from the forward file
    sample_name=$(basename "$forward_file" _R1_001.fastq.gz)

    # Construct the reverse file name
    reverse_file="${sample_name}_R2_001.fastq.gz"

    # Run itsxpress command
    $ITSXPRESS_EXECUTABLE --fastq "$forward_file" --fastq2 "$reverse_file" --region ITS2 \
        --taxa Fungi --log "${sample_name}_logfile.txt" --outfile "${sample_name}_trimmed_R1.fastq.gz" --outfile2 "${sample_name}_trimmed_R2.fastq.gz" --threads 4

    # Optionally, you can print a message indicating the completion of each iteration
    echo "Processing $forward_file and $reverse_file"
done

DADA2 AND OTUS IN QIIME2

IMPORTING STANDALONE DATA

qiime tools import\
  --type 'SampleData[SequencesWithQuality]'\
  --input-path manifest.txt\
  --output-path single-end-demux-standalone.qza\
  --input-format SingleEndFastqManifestPhred33V2

DADA2 FOR MERGED SEQUENCES

  • Run DADA2 on the two data-sets.
 for i in *; do qiime dada2 denoise-single --i-demultiplexed-seqs $i --p-trunc-len 0 --p-n-threads 4 --output-dir dada2_$i; done
  • Filter seqs by length (>50bp)
qiime feature-table filter-seqs\
  --m-metadata-file dada2_single-end-demux-qiime2.qza/representative_sequences.qza\
  --i-data dada2_single-end-demux-qiime2.qza/representative_sequences.qza\
  --o-filtered-data representative_sequences_50_qiime2.qza\
  --p-where "length(sequence) > 49"

qiime feature-table filter-seqs\
  --m-metadata-file dada2_single-end-demux-standalone.qza/representative_sequences.qza\
  --i-data dada2_single-end-demux-standalone.qza/representative_sequences.qza\
  --o-filtered-data representative_sequences_50_standalone.qza\
  --p-where "length(sequence) > 49"
  • Filter table based on seqs
qiime feature-table filter-features\
  --i-table dada2_single-end-demux-qiime2.qza/table.qza\
  --m-metadata-file representative_sequences_50_qiime2.qza\
  --o-filtered-table table_50_qiime2.qza

qiime feature-table filter-features\
  --i-table dada2_single-end-demux-standalone.qza/table.qza\
  --m-metadata-file representative_sequences_50_standalone.qza\
  --o-filtered-table table_50_standalone.qza

DADA2 FOR UNMERGED SEQUENCES

 qiime dada2 denoise-paired --i-demultiplexed-seqs paired-end-demux.qza  --p-trunc-len-f 0 --p-trunc-len-r 0 --p-n-threads 4 --output-dir dada2_paired

Filter by length not neccesary because min length was 231.

DADA2 FOR UNMERGED SEQUENCES STANDALONE ISTXPRESS

qiime tools import   --type 'SampleData[PairedEndSequencesWithQuality]'   --input-path manifest_unmerged.txt   --output
-path paired-end-demux-standalone.qza --input-format PairedEndFastqManifestPhred33V2
qiime dada2 denoise-paired --i-
demultiplexed-seqs paired-end-demux-standalone.qza  --p-trunc-len-f 0 --p-trunc-len-r 0
--p-n-threads 4 --output-dir dada2_paired_standalone

OTU’s FOR MERGED SEQUENCES

For qiime2-itsxpress data, we have to export and run:

for i in *.fastq.gz; do reformat in=$i out=clean_$i minconsecutivebases=1; done

This script is from (BBMap)[https://github.com/BioInfoTools/BBMap/blob/master/sh/reformat.sh]

Then import again and run all. This is due to an error of ITSxpress that generates sequences with 0 lentgh. The next steps will be run for the standalone and qiime2 data in order to cluster to OTUS.

  • Filter by q-score
for i in *;do  qiime quality-filter q-score --i-demux $i --output-dir filterqscore_$i; done
  • Derreplication
for i in filterqscore_*; do qiime vsearch dereplicate-sequences --i-sequences $i/filtered_sequences.qza --output-dir derep_$i;done
  • Clustering de novo
for i in derep_* ; do qiime vsearch cluster-features-de-novo --i-sequences $i/dereplicated_sequences.qza --i-table $i/dereplicated_table.qza --p-perc-identity 0.97 --p-threads 4 --output-dir cluster97_$i; done
  • Chimera checking and filter from table
for i in cluster97_*; do qiime vsearch uchime-denovo --i-sequences $i/clustered_sequences.qza --i-table $i/clustered_table.qza --output-dir chimera97_$i;done
qiime feature-table filter-features\
  --i-table cluster97_derep_filterqscore_single-end-demux-qiime2-reformat.qza/clustered_table.qza\
  --m-metadata-file chimera97_cluster_derep_filterqscore_single-end-demux-qiime2-reformat.qza/nonchimeras.qza  \
  --o-filtered-table chimera97_cluster_derep_filterqscore_single-end-demux-qiime2-reformat.qza/table97-nonchimeras-qiime2.qza
qiime feature-table filter-features\
--i-table cluster97_derep_filterqscore_single-end-demux-standalone.qza/clustered_table.qza\
--m-metadata-file chimera97_cluster_derep_filterqscore_single-end-demux-standalone.qza/nonchimeras.qza  --o-filtered-table chimera97_cluster_derep_filterqscore_single-end-demux-standalone.qza/table97-nonchimeras-standalone.qza
  • Filtering singletons from table and seqs (optional)
 for i in chimera* ; do qiime feature-table filter-features --i-table $i/table*.qza --p-min-frequency 2 --o-filtered-table $i/filtered_$i ; done
for i in chimera*/; do
    data_file=$(find $i -name 'nonchimeras*' -type f)
    table_file=$(find $i -name 'filtered-table*' -type f)

    qiime feature-table filter-seqs \
        --i-data $data_file \
        --i-table $table_file \
        --o-filtered-data ${data_file%.qza}-filtered.qza
done

DADA2 IN R

OPTION 1 : MERGED SEQUENCES

  • Load libraries and check versions
library(dada2)
packageVersion("dada2")
library(ShortRead)
packageVersion("ShortRead")
library(Biostrings)
packageVersion("Biostrings")
  • Path where the sequences are saved
path <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/ITSxpress_STANDALONE/ITSxpress_merged/"  ## CHANGE ME to the directory containing the fastq files.
head(list.files(path))
  • List of files
fnFs <- sort(list.files(path, pattern = "_L001_trimmed_reads.fastq.gz", full.names = TRUE))
  • Primers
FWD <- "AACTTTYRRCAAYGGATCWCT"  ## CHANGE ME to your forward primer sequence
REV <- "AGCCTCCGCTTATTGATATGCTTAART"  ## CHANGE ME...
  • Check for primers (not must be present because itsxpress)
allOrients <- function(primer) {
    # Create all orientations of the input sequence
    require(Biostrings)
    dna <- DNAString(primer)  # The Biostrings works w/ DNAString objects rather than character vectors
    orients <- c(Forward = dna, Complement = Biostrings::complement(dna), Reverse = Biostrings::reverse(dna),
        RevComp = Biostrings::reverseComplement(dna))
    return(sapply(orients, toString))  # Convert back to character vector
}
FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)
FWD.orients
fnFs.filtN <- file.path(path, "filtN", basename(fnFs)) 
#filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE)
filterAndTrim(fnFs, fnFs.filtN, maxN = 0, multithread = TRUE)
primerHits <- function(primer, fn) {
    # Counts number of reads in which the primer is found
    nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
    return(sum(nhits > 0))
}
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]),
      #FWD.ReverseReads = sapply(FWD.orients,primerHits, fn = fnRs.filtN[[1]]))
      REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.filtN[[1]]))
      #REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))
  • Quality plot
plotQualityProfile(fnFs[1:2])
#plotQualityProfile(cutRs[1:2])
  • Filter by quality
filtFs <- file.path(path, "filtered", basename(fnFs))
out <- filterAndTrim(fnFs, filtFs, maxN = 0, maxEE = 2, truncQ = 2,
    minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE)  # on windows, set multithread = FALSE
head(out)
  • Model of lear Errors
errF <- learnErrors(filtFs, multithread = TRUE)
#plotErrors(errF, nominalQ = TRUE)
  • Dereplication
derepFs <- derepFastq(filtFs, verbose = TRUE)
  • Inference of ASVs
dadaFs <- dada(derepFs, err = errF, multithread = TRUE)
#dadaFs2 <- dada(derepFs, err = errF, multithread = TRUE,   =1e-40, pool="pseudo" )
  • Sequences table
seqtab <- makeSequenceTable(dadaFs)
#seqtab2 <- makeSequenceTable(dadaFs2)
dim(seqtab)
  • chimera removal
seqtab_nochim <- removeBimeraDenovo(seqtab, method = "consensus", multithread = T, 
                                    verbose = T)
#seqtab_nochim2 <- removeBimeraDenovo(seqtab2, method = "consensus", multithread = T, 
 #                                   verbose = T)
dim(seqtab_nochim)
  • Denoising statistics
getN <-function(x) sum(getUniques(x))
# ?getUniques
stats <- cbind(out, sapply(dadaFs, getN), 
               rowSums(seqtab_nochim))

colnames(stats) <- c("input", "filtered", "denoisedF",  "nonchim")

head(stats)
  • Sequences statistics (chatGPT)
library(tidyverse)
# Obtener los nombres de las columnas de 'df'
seqs <- colnames(seqtab)

# Función para contar letras en una cadena
contar_letras <- function(cadena) {
  str_count(cadena, "[A-Za-z]")
}

# Aplicar la función de conteo a cada elemento de 'nombres_columnas'
conteo_letras <- map_int(seqs, ~ contar_letras(.x))

#print(conteo_letras)
min(conteo_letras)
max(conteo_letras)
mean(conteo_letras)
  • Assign taxonomy
#----------Asignacion taxonomica-----------
ruta_clasificador <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/sh_general_release_dynamic_18.07.2023.fasta" # de la página de UNITE
taxa <- assignTaxonomy(seqtab_nochim, ruta_clasificador, multithread = TRUE, tryRC = TRUE)
taxa2 <- assignTaxonomy(seqtab_nochim2, ruta_clasificador, multithread = TRUE, tryRC = TRUE)

# Visualizar lo que se genero despues de la asignacion
taxa_print <- taxa
rownames(taxa_print) <- NULL

head(taxa_print)
dim(taxa_print)
  • Export data
save( seqtab_nochim2, taxa2,
      file = "dada2_results2.RData")
write.csv(taxa, "taxonomy.csv")
#write.csv(seqtab_final, "table_final.csv", row.names = TRUE)
write_tsv(seqtab_final %>% rownames_to_column("#SampleID"), "table_final.txt")

write.csv(stats, "stats.csv")

#remotes::install_github("vmikk/metagMisc")
library(metagMisc)
dada_to_fasta(seqtab = seqtab, out = "seqtab.fasta")
#dada_to_fasta(seqtab = seqtab2, out = "seqtab2.fasta")

OPTION 2 : UNMERGED SEQUENCES

  • Load libraries and check versions
library(dada2)
packageVersion("dada2")
library(ShortRead)
packageVersion("ShortRead")
library(Biostrings)
packageVersion("Biostrings")
  • Path where the sequences are saved
path <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/ITS_STANDALONE/"  ## CHANGE ME to the directory containing the fastq files.
head(list.files(path))
  • List of files
fnFs <- sort(list.files(path, pattern = "_R1.fastq.gz", full.names = TRUE))
fnRs <- sort(list.files(path, pattern = "_R2.fastq.gz", full.names = TRUE))
  • Primers
FWD <- "AACTTTYRRCAAYGGATCWCT"  ## CHANGE ME to your forward primer sequence
REV <- "AGCCTCCGCTTATTGATATGCTTAART"  ## CHANGE ME...
  • Check for primers (not must be present because itsxpress)
allOrients <- function(primer) {
    # Create all orientations of the input sequence
    require(Biostrings)
    dna <- DNAString(primer)  # The Biostrings works w/ DNAString objects rather than character vectors
    orients <- c(Forward = dna, Complement = Biostrings::complement(dna), Reverse = Biostrings::reverse(dna),
        RevComp = Biostrings::reverseComplement(dna))
    return(sapply(orients, toString))  # Convert back to character vector
}
FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)
FWD.orients
fnFs.filtN <- file.path(path, "filtN", basename(fnFs)) # Put N-filtered files in filtN/ subdirectory
fnRs.filtN <- file.path(path, "filtN", basename(fnRs))
filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE)
primerHits <- function(primer, fn) {
    # Counts number of reads in which the primer is found
    nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
    return(sum(nhits > 0))
}
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]), FWD.ReverseReads = sapply(FWD.orients,
    primerHits, fn = fnRs.filtN[[1]]), REV.ForwardReads = sapply(REV.orients, primerHits,
    fn = fnFs.filtN[[1]]), REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))
  • remove primers
#cutadapt <- "C:/Users/HP/AppData/Local/Programs/Python/Python312/Scripts/cutadapt.exe" # CHANGE ME to the cutadapt path on your machine
#system2(cutadapt, args = "--version") # Run shell commands from R

# Definir la ruta al lanzador de Python
python_launcher <- "py.exe"

# Ejecutar cutadapt desde R
output <- system2(python_launcher, args = c("-m", "cutadapt", "--version"), stdout = TRUE)
print(output)


path.cut <- file.path(path, "cutadapt")
if(!dir.exists(path.cut)) dir.create(path.cut)
fnFs.cut <- file.path(path.cut, basename(fnFs))
fnRs.cut <- file.path(path.cut, basename(fnRs))

FWD.RC <- dada2:::rc(FWD)
REV.RC <- dada2:::rc(REV)
# Trim FWD and the reverse-complement of REV off of R1 (forward reads)
R1.flags <- paste("-g", FWD, "-a", REV.RC) 
# Trim REV and the reverse-complement of FWD off of R2 (reverse reads)
R2.flags <- paste("-G", REV, "-A", FWD.RC)
# Run Cutadapt
for(i in seq_along(fnFs)) {
  system2(python_launcher, args = c("-m", "cutadapt",R1.flags, R2.flags, "-n", 2, # -n 2 required to remove FWD and REV from reads
                             "-o", fnFs.cut[i], "-p", fnRs.cut[i], # output files
                             fnFs.filtN[i], fnRs.filtN[i],
                             "--minimum-length=1")) # input files
}
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.cut[[1]]), FWD.ReverseReads = sapply(FWD.orients,
    primerHits, fn = fnRs.cut[[1]]), REV.ForwardReads = sapply(REV.orients, primerHits,
    fn = fnFs.cut[[1]]), REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.cut[[1]]))



# Forward and reverse fastq filenames have the format:
cutFs <- sort(list.files(path.cut, pattern = "_R1.fastq.gz", full.names = TRUE))
cutRs <- sort(list.files(path.cut, pattern = "_R2.fastq.gz", full.names = TRUE))

# Extract sample names, assuming filenames have format:
get.sample.name <- function(fname) strsplit(basename(fname), "_")[[1]][1]
sample.names <- unname(sapply(cutFs, get.sample.name))
head(sample.names)
  • Quality plot
plotQualityProfile(cutFs[1:2])
plotQualityProfile(cutRs[1:2])
  • Filter by quality
filtFs <- file.path(path.cut, "filtered", basename(cutFs))
filtRs <- file.path(path.cut, "filtered", basename(cutRs))
out <- filterAndTrim(cutFs, filtFs, cutRs, filtRs, maxN = 0, maxEE = c(2, 2), truncQ = 2,
    minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE)  # on windows, set multithread = FALSE
head(out)
  • Model of lear Errors
errF <- learnErrors(filtFs, multithread = TRUE)
errR <- learnErrors(filtRs, multithread = TRUE)

#plotErrors(errF, nominalQ = TRUE)
  • Dereplication
derepFs <- derepFastq(filtFs, verbose = TRUE)
derepRs <- derepFastq(filtRs, verbose = TRUE)
  • Inference of ASVs
dadaFs <- dada(derepFs, err = errF, multithread = TRUE )
dadaRs <- dada(derepRs, err = errF, multithread = TRUE )
  • Merge
mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, verbose=TRUE)
  • Obtención de tabla de secuencias
seqtab <- makeSequenceTable(mergers)
dim(seqtab)
  • Quimeras removal
seqtab_nochim <- removeBimeraDenovo(seqtab, method = "consensus", multithread = T, 
                                    verbose = T)
dim(seqtab_nochim)
  • Denoising statistics
getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN),
    rowSums(seqtab_nochim))
# If processing a single sample, remove the sapply calls: e.g. replace
# sapply(dadaFs, getN) with getN(dadaFs)
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track) <- sample.names
head(track)
  • Sequences statistics (chatGPT)
library(tidyverse)
# Obtener los nombres de las columnas de 'df'
seqs <- colnames(seqtab)

# Función para contar letras en una cadena
contar_letras <- function(cadena) {
  str_count(cadena, "[A-Za-z]")
}

# Aplicar la función de conteo a cada elemento de 'nombres_columnas'
conteo_letras <- map_int(seqs, ~ contar_letras(.x))

#print(conteo_letras)
min(conteo_letras)
max(conteo_letras)
mean(conteo_letras)
  • Assign taxonomy
#----------Asignacion taxonomica-----------
ruta_clasificador <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/sh_general_release_dynamic_18.07.2023.fasta" # de la página de UNITE
taxa <- assignTaxonomy(seqtab_nochim, ruta_clasificador, multithread = TRUE, tryRC = TRUE)

# Visualizar lo que se genero despues de la asignacion
taxa_print <- taxa
rownames(taxa_print) <- NULL

head(taxa_print)
dim(taxa_print)

Exportar objetos generados durante el pre-procesamiento

save( seqtab_nochim, taxa, 
      file = "dada2_results_paired_final.RData")
write_tsv(seqtab_final %>% rownames_to_column("#SampleID"), "table3_no_hasids.txt")


#remotes::install_github("vmikk/metagMisc")
library(metagMisc)
dada_to_fasta(seqtab = seqtab, out = "seqtab3.fasta")

seqtab_final <- as.data.frame(seqtab_nochim) %>%
  rownames_to_column(var = "ids") %>%
  mutate(ids = str_extract(ids, "[0-9]+[A-Z]+")) %>%
  column_to_rownames(var = "ids")

RESULTS FROM DIFFERENT METHODS

Statistics

ALPHA DIVERSIDAD

Figura 2. Alpha diversidad a todos los órdenes

Figura 3. Alpha diversidad por polígono al orden q=0

Figura 4. Alpha diversidad por polígono al orden q=1

Figura 5. Alpha diversidad por polígono al orden q=2

PCA plots